To run an optimization I need for one district (Neukölln?) - residential buildings ~> children per building ~> children age 5/6 per block - capacity per school

Load all base data

sampled_buildings = read_csv('output/sampled_buildings.csv')
Parsed with column specification:
cols(
  OI = col_character(),
  long = col_double(),
  lat = col_double(),
  BLK = col_character()
)
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_BEZ_2015_12.geojson", layer: "OGRGeoJSON"
with 13 features
It has 2 fields
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_BLK_2015_12.geojson", layer: "OGRGeoJSON"
with 15720 features
It has 4 fields
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON 
Source: "download/RBS_OD_LOR_2015_12.geojson", layer: "OGRGeoJSON"
with 447 features
It has 8 fields
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON 
Source: "download/re_schulstand.geojson", layer: "OGRGeoJSON"
with 709 features
It has 20 fields

Load the huge route matrix

routen_matrix = read_csv('output/routen_matrix.csv') %>% mutate(distance=distance*60)
Parsed with column specification:
cols(
  src = col_character(),
  dst = col_character(),
  duration = col_double(),
  distance = col_double()
)

Load school capacities and population data

TODO - neue Daten von Torres

kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
Parsed with column specification:
cols(
  Bezirk = col_character(),
  Schulnummer = col_character(),
  Schulname = col_character(),
  Plätze = col_integer(),
  Anmeldungen = col_character()
)
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
Parsed with column specification:
cols(
  .default = col_character(),
  ZEIT = col_integer(),
  STADTRAUM = col_integer(),
  E_E = col_number(),
  E_EM = col_number(),
  E_EW = col_number(),
  E_E50_55 = col_number(),
  E_E25U55 = col_number(),
  E_E55U65 = col_number(),
  E_E65U80 = col_number()
)
See spec(...) for full column specifications.

Wo haben wir Kapazitäten?

re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
  group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
Joining, by = "Bezirk"

Wir wählen einen Bezirk, wo wir von allen Schulen Kapas haben.

re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
Joining, by = "Bezirk"
bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
 [1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
 [9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01

07G02

07G03

07G05

07G06

07G07

07G10

07G12

07G13

07G14

07G15

07G16

07G17

07G18

07G19

07G20

07G21

07G22

07G23

07G24

07G25

07G26

07G27

07G28

07G29

07G30

07G31

07G32

07G34

07G35

07G36

07G37
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
[1] "In Anmeldeliste, fehlt in Schulstand"
In Anmeldeliste, fehlt in Schulstand
setdiff(schulen_mit_kapa, grundschulen)
character(0)
'In re_schulstand, fehlt in Anmeldeliste'
[1] "In re_schulstand, fehlt in Anmeldeliste"
In re_schulstand, fehlt in Anmeldeliste
setdiff(grundschulen, schulen_mit_kapa)
character(0)

OMG nur bei Mitte mappt das perfekt! Alle anderen erfordern Nachforschungen.

map = get_map('Berlin')
Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Berlin&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Berlin&sensor=false
#re_schulstand[re_schulstand$spatial_name %in% setdiff(grundschulen, schulen_mit_anmeldungen),] %>% View()
re_schulstand_df = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))
Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
data = re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(has.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=has.capa), data=data) +
    coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Ignorieren das Problem und tun so, als ob nur diese Schulen relevant sind:

relevant_schools = re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
 [1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
 [9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01

07G02

07G03

07G05

07G06

07G07

07G10

07G12

07G13

07G14

07G15

07G16

07G17

07G18

07G19

07G20

07G21

07G22

07G23

07G24

07G25

07G26

07G27

07G28

07G29

07G30

07G31

07G32

07G34

07G35

07G36

07G37

Kinder im Bezirk auf Gebäude aufteilen

  1. LORs in Neuköln
  2. Finden aller Wohngebebäude in diesen LORs
  3. Über einwohler_lor finden wir die durchschnittliche Kinderzahl pro _Wohngebäude

Mapping Bezirk->LOR->Block

df_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)

LORs im Bezirk

bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
Regions defined for each Polygons
Regions defined for each Polygons

Blöcke im Bezirk

ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
Regions defined for each Polygons
Regions defined for each Polygons

Wohngebäude in diesen LORs

Schon erledigt in sampled_buildings.

Alter

TODO neue Daten von Torres

relevant_ewr = einwohner_lor %>% select(RAUMID, E_E06_07) %>% filter(RAUMID %in% relevant_lors$PLR) %>%
  mutate(kids=as.numeric(gsub(',','.',E_E06_07))) %>% as.data.frame()

data = tidy(lor[lor$BEZ == bez_id,], region='PLR') %>% inner_join(relevant_ewr, by=c('id'='RAUMID'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Kids in BLKs

For each LOR distribute the children according to the number of people living in that block

kids_in_blks = relevant_blks %>% group_by(PLR) %>% mutate(EinwRatio = Einw/sum(Einw)) %>% ungroup %>% left_join(relevant_ewr, by=c('PLR'='RAUMID')) %>% mutate(kids = EinwRatio*kids) %>% select(BEZ, PLR, BLK, Einw, kids) %>% as.data.frame()
Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
row.names(kids_in_blks) = kids_in_blks$BLK

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(kids_in_blks, by=c('id'='BLK')) %>% mutate(kids=ifelse(kids==0, NA, kids), Einw=ifelse(Einw==0, NA, Einw))
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
0
[1] 0
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

Kapas

relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()
#row.names(relevant_kapas) = relevant_kapas$Schulnummer

Sanity

'Summe Kapas'
[1] "Summe Kapas"
Summe Kapas
relevant_kapas %>% .$Kapa %>% sum
[1] 2584
'Summe Kapas + extra'
[1] "Summe Kapas + extra"
Summe Kapas + extra
relevant_kapas %>% .$Kapa_extra %>% sum
[1] 0
'Anmeldungen'
[1] "Anmeldungen"
Anmeldungen
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
[1] 2752
'Kids laut Statistik'
[1] "Kids laut Statistik"
Kids laut Statistik
kids_in_blks$kids %>% sum
[1] 2855
relevant_ewr$kids %>% sum
[1] 2855

Calculate mean distances to school per block

find block of each residential building

residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
Joining, by = "BLK"
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
residential_buildings_blocks
routes_from_blks = residential_buildings_blocks %>%
  left_join(routen_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)

Which blocks are relevant because there are residential buildings (TODO and someone lives there)

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=data) +
  #geom_point(aes(x=lon, y=lat), data=rb_df, color='black', size=0.01) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks
travel_from_blks %>% write_csv('output/travel_from_blks.csv')

Color Blocks by avg distance

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
  geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))

travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
[1] 1012   33
travel_matrix

Select relevant data

optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids) %>% mutate(kids=kids*0.88)
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
factor and character vector, coercing into character vector
nrow(optim_kids_in_blks)
[1] 1012
nrow(optim_kapas)
[1] 32
select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)

optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]

dim(optim_matrix)
[1] 1012   32
optim_kapas$Kapa %>% sum
[1] 2584
optim_kids_in_blks$kids %>% sum
[1] 2503.5
optim_matrix %>% write_csv('output/optim_matrix.csv')
optim_kapas %>% write_csv('output/optim_kapas.csv')
optim_kids_in_blks %>% write_csv('output/optim_blks.csv')

Naive solution (that doesn’t obey capacity constraints)

solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup

optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()
[1] 47204364
solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))
Joining, by = "BLK"
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)

solution %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>% summarise_each("max", -BLK, -school, -dist)

JuMP

julia jump.jl
solution = read_csv('output/solution_jump.csv') %>% setNames(., select_schools) %>%
  mutate(BLK=select_blks) %>% gather(school, assigned, -BLK) %>% filter(assigned > 0)
Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))
Joining, by = "BLK"
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)

solution %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>% summarise_all(max)
sum(read_csv('output/solution_jump.csv') * optim_matrix)
Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
[1] 50098818
sum(read_csv('output/solution_jump.csv') * optim_matrix * optim_kids_in_blks$kids)
Parsed with column specification:
cols(
  .default = col_double()
)
See spec(...) for full column specifications.
[1] 134794056

Display

library(formattable)
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
  group_by(school) %>% summarise(
    kids=sum(kids),
    num_blocks=n(),
    min_dist=min(min),
    avg_dist=mean((kids*avg)/sum(kids)),
    max_dist=max(max)
  ) %>%
  inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
  mutate(
    utilization=kids/Kapa
  ) %>% select(
   Schule=school,
   `Blöcke`=num_blocks,
   Kapazität=Kapa,
   Kinder=kids,
   Auslastung=utilization,
   `Weg (min)`=min_dist,
   `Weg (Ø)`=avg_dist,
   `Weg (max)`=max_dist
  ) %>%
  formattable(
    list(
      Kinder = formatter("span", x ~ digits(x, 2)),
      Auslastung = formatter("span",
        style = x ~ style(color = ifelse(x < 1, "green", "red")),
        x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
      ),
      `Weg (Ø)` = proportion_bar("lightblue"),
      `Weg (min)` = proportion_bar("lightblue"),
      `Weg (max)` = proportion_bar("lightblue")
    )
  )
Schule Blöcke Kapazität Kinder Auslastung Weg (min) Weg (Ø) Weg (max)
07G01 13 66 65.87 99.80% 10394.854 41208.35 67553.24
07G02 25 98 68.45 69.84% 9854.407 43308.79 95793.37
07G03 18 73 72.98 99.97% 5612.879 34905.84 67585.35
07G05 17 78 77.32 99.13% 2974.415 34113.15 55922.32
07G06 23 55 54.90 99.83% 14486.592 47310.14 84725.57
07G07 26 81 79.26 97.85% 6858.002 58310.50 126217.21
07G10 27 112 111.78 99.81% 11013.279 37674.58 87048.43
07G12 21 75 73.61 98.15% 7133.251 32159.58 126987.39
07G13 21 82 81.59 99.50% 11938.918 32602.53 59753.91
07G14 32 78 77.91 99.88% 4114.720 28625.73 65652.98
07G15 32 104 103.74 99.75% 10521.086 49633.38 90947.69
07G16 33 104 97.74 93.98% 11396.049 51016.56 165876.56
07G17 29 100 99.71 99.71% 1236.955 33875.89 111973.53
07G18 14 44 43.70 99.31% 4134.006 23755.78 46592.26
07G19 34 112 111.42 99.48% 4551.512 56389.17 130002.55
07G20 27 81 80.72 99.65% 1193.574 35844.04 86727.51
07G21 22 72 71.01 98.62% 13109.427 31728.63 76416.08
07G22 30 91 87.74 96.41% 8272.397 35984.90 81386.13
07G23 16 50 48.81 97.62% 6689.752 81418.59 152893.48
07G24 28 75 74.51 99.35% 7090.940 40827.13 118576.96
07G25 26 75 74.73 99.64% 5507.963 31427.67 69206.39
07G26 23 52 43.19 83.06% 5428.220 33540.96 101684.31
07G27 26 75 74.25 99.00% 2532.286 45702.87 98958.30
07G28 59 75 74.76 99.68% 9342.737 55027.04 121515.06
07G29 91 104 103.74 99.75% 1138.862 56844.63 123198.30
07G30 45 72 71.42 99.19% 482.926 54391.29 146048.88
07G31 24 78 68.22 87.46% 5582.570 59090.95 143043.15
07G32 59 78 77.88 99.84% 4557.764 49076.43 263035.55
07G34 38 100 95.34 95.34% 21120.242 72165.91 133903.62
07G35 38 75 71.75 95.67% 5838.956 47407.20 88698.65
07G36 43 100 96.48 96.48% 9295.770 69304.17 288785.86
07G37 52 69 68.99 99.99% 1399.929 80731.00 133766.21

Daten für das Tool

write_rds(solution, 'app/data/init_solution.rds')
write_rds(subset(blk, BEZ == bez_id), 'app/data/blocks.rds')
write_rds(subset(re_schulstand_df, spatial_name %in% relevant_schools), 'app/data/schools.rds')
write_rds(subset(bez, BEZ == bez_id), 'app/data/bez.rds')

block_stats = optim_kids_in_blks %>% inner_join(travel_from_blks, by=c('BLK'='BLK')) %>% inner_join(relevant_kapas, by=c('dst'='Schulnummer')) %>% inner_join(re_schulstand_df, by=c('dst'='spatial_name'))
write_rds(block_stats, 'app/data/block_stats.rds')
---
title: "R Notebook"
output: html_notebook
---

```{r libs, include=F, warning=F}
library(readr)
library(rgdal)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(purrr)
library(knitr)
library(broom)
library(maptools)
```

To run an optimization I need for one district (Neukölln?)
- residential buildings ~> children per building ~> children age 5/6 per block
- capacity per school

Load all base data

```{r load data}
sampled_buildings = read_csv('output/sampled_buildings.csv')
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON')
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON')
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON')
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON')
```

Load the huge route matrix

```{r}
routen_matrix = read_csv('output/routen_matrix.csv') %>% mutate(distance=distance*60)
```

Load school capacities and population data

TODO - neue Daten von Torres

```{r}
kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
```

## Wo haben wir Kapazitäten?

```{r}
re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
  group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
```

Wir wählen einen Bezirk, wo wir von allen Schulen Kapas haben.

```{r}
re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
  rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
```


```{r}
bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
setdiff(schulen_mit_kapa, grundschulen)
'In re_schulstand, fehlt in Anmeldeliste'
setdiff(grundschulen, schulen_mit_kapa)
```

OMG nur bei Mitte mappt das perfekt! Alle anderen erfordern Nachforschungen.

```{r}
map = get_map('Berlin')
```


```{r}
#re_schulstand[re_schulstand$spatial_name %in% setdiff(grundschulen, schulen_mit_anmeldungen),] %>% View()
re_schulstand_df = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))
```

```{r}
data = re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(has.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=has.capa), data=data) +
    coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

Ignorieren das Problem und tun so, als ob nur diese Schulen relevant sind:

```{r}
relevant_schools = re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
```

## Kinder im Bezirk auf Gebäude aufteilen

1. LORs in Neuköln
1. Finden aller Wohngebebäude in diesen LORs
2. Über `einwohler_lor` finden wir die durchschnittliche Kinderzahl pro _Wohngebäude

### Mapping Bezirk->LOR->Block

```{r}
df_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)
```

### LORs im Bezirk

```{r}
bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
```


```{r}
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
```

### Blöcke im Bezirk

```{r}
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
```


### Wohngebäude in diesen LORs

Schon erledigt in `sampled_buildings`.

### Alter

TODO neue Daten von Torres

```{r}
relevant_ewr = einwohner_lor %>% select(RAUMID, E_E06_07) %>% filter(RAUMID %in% relevant_lors$PLR) %>%
  mutate(kids=as.numeric(gsub(',','.',E_E06_07))) %>% as.data.frame()

data = tidy(lor[lor$BEZ == bez_id,], region='PLR') %>% inner_join(relevant_ewr, by=c('id'='RAUMID'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

### Kids in BLKs

For each LOR distribute the children according to the number of people living in that block

```{r}
kids_in_blks = relevant_blks %>% group_by(PLR) %>% mutate(EinwRatio = Einw/sum(Einw)) %>% ungroup %>% left_join(relevant_ewr, by=c('PLR'='RAUMID')) %>% mutate(kids = EinwRatio*kids) %>% select(BEZ, PLR, BLK, Einw, kids) %>% as.data.frame()
row.names(kids_in_blks) = kids_in_blks$BLK

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(kids_in_blks, by=c('id'='BLK')) %>% mutate(kids=ifelse(kids==0, NA, kids), Einw=ifelse(Einw==0, NA, Einw))
0
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```

### Kapas

```{r}
relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()
#row.names(relevant_kapas) = relevant_kapas$Schulnummer
```

### Sanity

```{r}
'Summe Kapas'
relevant_kapas %>% .$Kapa %>% sum
'Summe Kapas + extra'
relevant_kapas %>% .$Kapa_extra %>% sum
'Anmeldungen'
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
'Kids laut Statistik'
kids_in_blks$kids %>% sum
relevant_ewr$kids %>% sum
```

### Calculate mean distances to school per block

find block of each residential building

```{r}
residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
residential_buildings_blocks
```

```{r}
routes_from_blks = residential_buildings_blocks %>%
  left_join(routen_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)
```

### Which blocks are relevant because there are residential buildings (TODO and someone lives there)

```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=data) +
  #geom_point(aes(x=lon, y=lat), data=rb_df, color='black', size=0.01) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```


```{r}
travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks
travel_from_blks %>% write_csv('output/travel_from_blks.csv')
```

Color Blocks by avg distance
```{r}
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
  geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
```


```{r}
travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
travel_matrix
```

# Select relevant data

```{r}
optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids) %>% mutate(kids=kids*0.88)
nrow(optim_kids_in_blks)
nrow(optim_kapas)

select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)

optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]

dim(optim_matrix)

optim_kapas$Kapa %>% sum
optim_kids_in_blks$kids %>% sum

optim_matrix %>% write_csv('output/optim_matrix.csv')
optim_kapas %>% write_csv('output/optim_kapas.csv')
optim_kids_in_blks %>% write_csv('output/optim_blks.csv')
```

### Naive solution (that doesn't obey capacity constraints)

```{r}
solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup

optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()

solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)
```

```{r}
solution %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>% summarise_each("max", -BLK, -school, -dist)
```

# JuMP

```
julia jump.jl
```

```{r}
solution = read_csv('output/solution_jump.csv') %>% setNames(., select_schools) %>%
  mutate(BLK=select_blks) %>% gather(school, assigned, -BLK) %>% filter(assigned > 0)

solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))

data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
  geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
  geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
  coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
  guides(color=F, fill=F)
```

```{r}
solution %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>% summarise_all(max)
```

```{r}
sum(read_csv('output/solution_jump.csv') * optim_matrix)
sum(read_csv('output/solution_jump.csv') * optim_matrix * optim_kids_in_blks$kids)
```

## Display

```{r}
library(formattable)
```

```{r}
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
  group_by(school) %>% summarise(
    kids=sum(kids),
    num_blocks=n(),
    min_dist=min(min),
    avg_dist=mean((kids*avg)/sum(kids)),
    max_dist=max(max)
  ) %>%
  inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
  mutate(
    utilization=kids/Kapa
  ) %>% select(
   Schule=school,
   `Blöcke`=num_blocks,
   Kapazität=Kapa,
   Kinder=kids,
   Auslastung=utilization,
   `Weg (min)`=min_dist,
   `Weg (Ø)`=avg_dist,
   `Weg (max)`=max_dist
  ) %>%
  formattable(
    list(
      Kinder = formatter("span", x ~ digits(x, 2)),
      Auslastung = formatter("span",
        style = x ~ style(color = ifelse(x < 1, "green", "red")),
        x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
      ),
      `Weg (Ø)` = proportion_bar("lightblue"),
      `Weg (min)` = proportion_bar("lightblue"),
      `Weg (max)` = proportion_bar("lightblue")
    )
  )
```


# Daten für das Tool
```{r}
write_rds(solution, 'app/data/init_solution.rds')
write_rds(subset(blk, BEZ == bez_id), 'app/data/blocks.rds')
write_rds(subset(re_schulstand_df, spatial_name %in% relevant_schools), 'app/data/schools.rds')
write_rds(subset(bez, BEZ == bez_id), 'app/data/bez.rds')

block_stats = optim_kids_in_blks %>% inner_join(travel_from_blks, by=c('BLK'='BLK')) %>% inner_join(relevant_kapas, by=c('dst'='Schulnummer')) %>% inner_join(re_schulstand_df, by=c('dst'='spatial_name'))
write_rds(block_stats, 'app/data/block_stats.rds')
```

